# ----------------------------------------
#
# Replication files
# Catalinac, Amy 
# Dominance Through Division: Group-based Clientelism in Japan
# 
#-----------------------------------------






#-----------------------------------------
# CHAPTER 8
#
# #-----------------------------------------








#-----------------------------------------
# PREPARATION

setwd("C:/Users/ac6037/Dropbox/TARGETING/replication2018/ANALYSIS OF MASTER DATA")
options(max.print=1000000)

dat <- readRDS("Master_plus_Snow_Turn_Trans_Dis6.rds")

elec.dat <- dat[!is.na(dat$hor_electoral_district),]

library(dplyr)
library(readstata13)
library(stargazer)
library(lmtest)
library(plm)
library(clubSandwich)
library(multiwayvcov)
library(car)
library(ggplot2)







# -----------------------------------------------------------

# This function conducts regressions on panel data, allowing different FE to be specified and different clustering of SEs,
# and adjusts the clustering so that it is done the way Stata does:

stata_plm <- function(formula, data,
                      model = "pooling", effect = "individual",
                      cluster){ # NB: cluster variable has to be a character string specifying the name of the variable we want to
  # cluster on.  It also must be in either of the indices of pdata.frame.
  
  # Plus, we will build in sanity checks: first, is the "cluster" argument a character string?
  if(class(cluster) != "character"){
    cat("Error: argument 'cluster' must be a character string that specifies the name of the cluster variable.")
    
    # Second, is "cluster" argument in either one of the indices of the pdata.frame?
  } else if(!(cluster %in% names(index(data)))){
    cat("Error: clustering variable must be either of indices of the pdata.frame")
  } else{
    
    # Use the cluster argument to specify the "cluster" argument of vcovHC ( which must be either of "group" and "time"):
    if(which(cluster == names(index(data))) == 1){
      cluster_index <- "group"
    } else if (which(cluster == names(index(data))) == 2){
      cluster_index <- "time"
    } else{
      # Finally, just to make sure the clustering variable is in the indices:
      cat("Error: clustering variable not found in the indices.")
    }
    
    # Estimate the regression with plm:
    reg <- plm(formula, data, model = model, effect = effect)
    # Adjust the clustered SEs the way Stata does them:
    len_clst <- length(unique(data[,cluster]))
    vcov <- vcovHC(reg, type = "HC1", method = "arellano",
                   cluster = cluster_index) * (len_clst/(len_clst-1))
    reg$vcov <- vcov
    return(reg)
  }
}





# ----------------------------------------------
# TABLE A.15
#
# descriptive statistics for Chapter 8
#
# ----------------------------------------------

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F & elec.dat$tmt_possible==1)

table(pre.red$year)

pre.red2 <- pre.red[,c("mun_turnout", "Lmun_turnout",
                       "vs_single_LDPw", "rvs_single_LDPw", 
                       "vs_all_LDPw", "vs_all_LDPc", "vs_single_LDPl", "vs_single_nonLDPIw", "vs_single_DPJw", 
                       "marginvs",
                       "mun_ceif", "needy_pc", "primary_pc", "lnpop", "logincome_pc", "mun_population_density")]

stargazer(pre.red2)

# Note: I use the same sample as in Chapter 6, so I focus on non-split
# municipalities in tournament-impossible districts.  I also exclude municipalities
# for which I don't have fiscal and demographic variables for, which can be because 
# a municipality merged, so did not exist for the full fiscal year.










# --------------------------------------------------------------
# TABLE 8.1 
#
# \ref{turnout1}
# # --------------------------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F &
                    elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

# Model 1
mod1 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod1)

# Model 2
mod2 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod2)

# Model 3
mod3 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + Lmun_turnout +",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod3)

# Model 4

mod4 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod4)

# ---------------------------------
## print table
stargazer(mod1, mod2, mod3, mod4,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)



# ---------------------------------
# Substantive effect

newdat <- mod1$model
mean(na.omit(newdat$mun_turnout))
sd(na.omit(newdat$vs_single_LDPw))

mod1$coefficients
sd(na.omit(newdat$vs_single_LDPw)) * mod1$coefficients[1]
# in percentage:
(sd(na.omit(newdat$vs_single_LDPw)) * mod1$coefficients[1])*100











# -------------------------------------------
# TABLE 8.2 
#
# \ref{turnout1pre} 
# -------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, elec.dat$year <= 1993 & is.na(elec.dat$muncode_num) == F
                  & elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

mod5 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod5)

mod6 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod6)

mod7 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod7)

mod8 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod8)

# -------------------------------------
# print table
# 
stargazer(mod5, mod6, mod7, mod8,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)

# -------------------------------------
# Substantive effect (mod5)

newdat <- mod5$model
mean(na.omit(newdat$mun_turnout))
sd(na.omit(newdat$vs_single_LDPw))
(sd(na.omit(newdat$vs_single_LDPw)) * mod5$coefficients[1])*100








# -------------------------------------------
# TABLE 8.5
#
# \ref{turnout1post}
# -------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, elec.dat$year > 1993 & is.na(elec.dat$muncode_num) == F
                   & elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

mod30 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod30)

mod31 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod31)

# print table
stargazer(mod30, mod31,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)

# substantive effect
newdat <- mod30$model
mean(na.omit(newdat$mun_turnout))
sd(na.omit(newdat$vs_single_LDPw))
(sd(na.omit(newdat$vs_single_LDPw)) * mod30$coefficients[1])*100
# 0.972072 (rounded to "1%" in text; 0.98% is more accurate)









# --------------------------------------------
# TABLE 8.4 
#
# \ref{turnoutAEpre}
# --------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, elec.dat$year <= 1993 & is.na(elec.dat$muncode_num) == F
                  & elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))



# --------------------------------
# Placebo: its not all LDP winners

mod20 <- stata_plm(paste("mun_turnout ~ vs_all_LDPw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod20)

# --------------------------------
# Placebo: its not all LDP candidates

mod21 <- stata_plm(paste("mun_turnout ~ vs_all_LDPc + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod21)

# --------------------------------
# Placebo: its not a single LDP loser

mod22 <- stata_plm(paste("mun_turnout ~ vs_single_LDPl + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod22)

# --------------------------------
# Placebo: its not votes for a single non LDP (and LDPI) winner

mod23 <- stata_plm(paste("mun_turnout ~ vs_single_nonLDPIw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "twoways", cluster ="muncode_num")
summary(mod23)



# --------------------------------
# print table

stargazer(mod20, mod21, mod22, mod23,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)











# -----------------------------------
# TABLE 8.6
#
# \ref{turnoutAEpost}
#
# -----------------------------------


# -----------------------------------
# Model 1
# Municipalities in post-1996 period in districts without LDP winners

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, elec.dat$year > 1993 & is.na(elec.dat$muncode_num) == F
                   & elec.dat$cand_01_pty!="LDP" & elec.dat$tmt_possible==1)

length(unique(pre.red$district_year))

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

mod50 <- stata_plm(paste("mun_turnout ~ vs_single_LDPl + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod50)

# -----------------------------------
# Model 2
# Municipalities in post-1996 period in districts without LDP winners but with
# LDP losers resurrected via PR

pre.red <- subset(elec.dat, elec.dat$year > 1993 & is.na(elec.dat$muncode_num) == F 
                   & elec.dat$cand_01_pty!="LDP"
                   & elec.dat$resurrectSSD==1
                   & elec.dat$tmt_possible==1)

length(unique(pre.red$district_year))

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

mod51 <- stata_plm(paste("mun_turnout ~ vs_single_LDPl + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod51)

# -----------------------------------
# Model 3
# Municipalities in post-1996 period in districts with DPJ winners

pre.red <- subset(elec.dat, elec.dat$year > 1993 & is.na(elec.dat$muncode_num) == F
                   & elec.dat$cand_01_pty=="DPJ"
                   & elec.dat$tmt_possible==1)

length(unique(pre.red$district_year))

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

mod52 <- stata_plm(paste("mun_turnout ~ vs_single_DPJw + Lmun_turnout + ",
                         controls), pre.red.pdf,
                   model = "within", effect = "time", cluster ="muncode_num")
summary(mod52)



# -----------------------------
# print table

stargazer(mod50, mod51, mod52,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)









# --------------------------------------
# TABLE A.16
# 
# \ref{turnout_convex}
# --------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F & 
                     elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

cubic.mod.full <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw  +
                               I(rvs_single_LDPw^2) + 
                               I(rvs_single_LDPw^3) + ",
                                  controls), data = pre.red.pdf,
                            model = "within", effect = "time", cluster = "muncode_num")
summary(cubic.mod.full)

linearHypothesis(cubic.mod.full, c("rvs_single_LDPw", "I(rvs_single_LDPw^2)", "I(rvs_single_LDPw^3)"))

stargazer(cubic.mod.full,
          no.space = T,
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))



# -------------------------------------------------
# Substantive effects

newdat <- cubic.mod.full$model
summary(newdat$rvs_single_LDPw)

# Which indices are the coefficients on rvs_single_LDPw, 
# rvs_single_LDPw, and rvs_single_LDPw in?
coef(cubic.mod.full) 
# they are in 1, 2, 3.

# A function to estimate the predicted effect from a cubic independent variable:

cubic_effect <- function(model, indep_rows = c(1,2,3), interval = 0.05, start = NULL, spec_points = NULL){
  ### Arguments
  ## "model" requires an lm or plm object
  ## "indep_rows" asks the indices of coefficients for the linear, square, and cubic independent variables
  ## "interval" specifies an interval for the grid of an independent variable
  ## "start" specifies the starting point of an interval for the grid of an independent variable.
  ##          If you leave this blank, it will be set at the minimum value
  ## "spec_points" should be a numeric vector of length 2. If you do not want a grid, but just the difference between
  ##          two points, e.g. between VShare 0.9 and 0.95, you can put these two values.
  
  ##  Extract coefficients for the cubic variable
  coefs <- coef(model)[indep_rows]
  
  ## If spec_points is not specified, estimation is done for a grid
  if(is.null(spec_points)){
    ## If start is not specified, the starting point will be the minimum
    if(is.null(start)){
      ## Range of the independent variable
      grid_range = range(model$model[,indep_rows[1] + 1])
    } else{
      ## If start is given, the range will be from "start" to the maximum
      grid_range = c(start, max(model$model[, indep_rows[1] + 1]))
    }
    ## grid is made according to the range with the interval specified by "interval"
    grid <- seq(grid_range[1], grid_range[2], by = interval)
  } else{
    grid <- spec_points
  }
  ## Squared and cubic grids are added
  grid_cubic <- rbind(t(grid), t(grid^2))
  grid_cubic <- rbind(grid_cubic, t(grid^3))
  
  ## Effects from these three variables are aggregated
  linear_pred <- t(grid_cubic) %*% coefs
  ## Row names are the grid points
  row.names(linear_pred) <- grid_cubic[1,]
  return(linear_pred)
}

two_points_top <- cubic_effect(cubic.mod.full, spec_points = c(0.95, 1))
two_points_top
(two_points_top[2] - two_points_top[1])*100

two_points_mid <- cubic_effect(cubic.mod.full, spec_points = c(0.8, 0.85))
(two_points_mid[2] - two_points_mid[1])*100

two_points_mid <- cubic_effect(cubic.mod.full, spec_points = c(0.5, 0.55))
(two_points_mid[2] - two_points_mid[1])*100








# ------------------------------------------
# FIGURE 8.1 
#
# \label{rvs_single_LDPw}
# -------------------------------------------

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F & 
                    elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "district_year"))

cubic.mod.full.lm <- lm(paste("mun_turnout ~ rvs_single_LDPw  + 
                              I(rvs_single_LDPw^2) + 
                              I(rvs_single_LDPw^3) + ", 
                              controls, "+ district_year"), data = pre.red.pdf)

cl_stata <- function(fm, cluster, dfadj, vcov=FALSE) {
  library(sandwich)
  library(lmtest)
  library(zoo)
  ## number of clusters
  M <- length(unique(cluster))
  ## num of obs
  N <- length(cluster)
  ## Stata style correction.
  ## dfadj is used to discount the loss of DF with respect to LEyear_district.
  dfc <- (M/(M-1))*((N-1)/(N-fm$rank + dfadj))
  ## calculation of VCOV
  u <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum))
  vcovCL <-dfc * sandwich (fm, meat = crossprod(u)/N)
  if(vcov==TRUE){
    return(vcovCL)
  } else{
    coeftest(fm, vcovCL)
  }
}
mod <- cubic.mod.full.lm$model

## clustered VCOV
clvcov <- cl_stata(cubic.mod.full.lm, mod$district_year, dfadj=length(unique(mod$district_year))+1, vcov = T)

## A function to calculate the mode:
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

## Calculate the marginal prediction at means.
## All the covariates except for district_year are fixed at their means.
## For district_year, I use the mode of it.

colnames(mod)
atmean <- c()
for (i in 1:(ncol(mod)-1)) {
  atmean <- c(atmean, mean(as.numeric(mod[,i])))
  print(i)
}

## For district_year, mode is used.
atmean <- c(atmean, as.character(getmode(mod$district_year)))
names(atmean) <- colnames(mod)
atmean

preddat <- data.frame()
for (i in 1:101) {
  this <- as.numeric(atmean[-c(1,3,4, 11)]) # NB; playing around with controls() requires
  # adjustment to this line of code (var in last column is district_year)
  this <- c(this, atmean[11])
  this[1] <- (i-1)/100
  names(this) <- c("rvs_single_LDPw", "mun_ceif", "needy_pc", "primary_pc", "lnpop", 
                   "logincome_pc", "mun_population_density", "district_year")
  preddat <- rbind(preddat, as.data.frame(t(this)))
}

for (j in 1:(ncol(preddat)-1)) {
  preddat[,j] <- as.numeric(as.character(preddat[,j]))
}

## car::Predict allows us to calculate confidence intervals with any given VCOV.
par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
pred_margin3 <- Predict(cubic.mod.full.lm, newdata = preddat, interval = "confidence", vcov. = clvcov)
matplot(pred_margin3, type = "l",  lwd = 3, cex = 1.5, cex.lab = 1.7, 
        col = c(1,1,1), xaxt = "n", xlab = "Rank(Single LDP Winner VS)", ylab = "Turnout Rate")
axis(1, at= c(0,20,40,60,80,100), labels = c(0, .2, .4, .6, .8, 1))











# ------------------------------------------------
# TABLE 8.7 
#
# \ref{marginality}
# -------------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

controlsd <- paste(controls, " + HI + logNum + DISmal + numLDPcand + ncands_electoral_district + totseat_in_electoral_district")

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F & elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "year"))



# ----------------------------------------------
# Model 1

close1 <- stata_plm(paste("mun_turnout ~ marginvs + Lmun_turnout + ",
                          controlsd), pre.red.pdf,
                    model = "within", effect = "twoways", cluster = "muncode_num")
summary(close1)

# substantive effect of marginvs

newdat <- close1$model
mean(na.omit(newdat$mun_turnout))
sd(na.omit(newdat$marginvs))
(sd(na.omit(newdat$marginvs)) * close1$coefficients[1])*100





# ----------------------------------------------
# Model 2

closei3 <- stata_plm(paste("mun_turnout ~ marginvs*rvs_single_LDPw + Lmun_turnout + ",
                           controlsd), pre.red.pdf,
                     model = "within", effect = "twoways", cluster = "muncode_num")
summary(closei3)

# substantive effect:

newdat <- closei3$model

coef(closei3)
# marginvs in #1, marginvs:rvs_single_LDPw in #16

# Effect of a 1-unit increase in Margin on municipalities whose rank is 1:
rank1coef <- closei3$coefficients[1] + closei3$coefficients[16]*1

# Effect of a 1-unit increase in Margin on municipalities whose rank is 0:
rank0coef <- closei3$coefficients[1] + closei3$coefficients[16]*0

# To translate these into actual amounts, multiply these coefficients 
# by one sd of margin, then multiply by 100 to get percent change

(rank1coef*sd(newdat$marginvs))*100

(rank0coef*sd(newdat$marginvs))*100


# ---------------------------
# print table

stargazer(close1, closei3,
          no.space = T,
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))






# ------------------------------------------
# FIGURE 8.2 
#
# \label{marginaleffect_rvs_single_LDPwfull}
# ------------------------------------------

# Marginal effects of closeness variable at different levels of vote concentration.
# Arguments: model name, positions of constitutive and interaction terms, 
# confidence level, and the label you want to use:
getME <- function(model, var1, var2, int, ci, modellabel){
  alpha <- 1-ci
  z <- qnorm(1-alpha/2)
  beta.hat <- coef(model)
  cov <- vcovHC(model, type="sss", cluster="group")
  z0 <- seq(min(model.frame(model)[,var2],na.rm=T),
            max(model.frame(model)[,var2],na.rm=T),
            length.out=1000)
  dy.dx <- beta.hat[var1] + beta.hat[int]*z0
  se.dy.dx <- sqrt(cov[var1,var1] + z0^2*cov[nrow(cov),ncol(cov)] + 2*z0*cov[var1,ncol(cov)])
  upr <- dy.dx + z*se.dy.dx
  lwr <- dy.dx - z*se.dy.dx
  medf <- data.frame(label=modellabel,
                     x=z0,
                     est=dy.dx,
                     lwr=lwr,
                     upr=upr)
  return(medf)
}

library(ggplot2)

coef(closei3)
closei3ME <- getME(closei3, 1, 2, 16, 0.95, "Margin")
ggplot(data = closei3ME, aes(x = x, y = est)) +
  geom_hline(yintercept = 0, lty = 3) +
  geom_line(lwd = 1.1) +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha=0.2) +
  #geom_line(aes(x = z0, y = upr), lty = 2) +
  #geom_line(aes(x = z0, y = lwr), lty = 2) +
  xlab("Rank(Single LDP Winner VS)") + 
  ylab("Average Marginal Effect of Vote Margin") +
  theme_bw() + 
  theme(axis.title.x = element_text(size = 17),
        axis.title.y = element_text(size = 17))

# saved as marginaleffect_rvs_single_LDPw table \ref{marginality}












# ------------------------------------------------
# TABLE 8.8  
#
# \label{marginality2}
# ----------------------------------------------

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

controlsd <- paste(controls, " + HI + logNum + DISmal + numLDPcand + ncands_electoral_district + totseat_in_electoral_district")

pre.red <- subset(elec.dat, is.na(elec.dat$muncode_num) == F & elec.dat$tmt_possible==1)

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "year"))



# -----------------------------------------
# Model 1 (their interaction)

closei4 <- stata_plm(paste("mun_turnout ~ marginvs*mun_population_density + Lmun_turnout + ",
                           controlsd), pre.red.pdf,
                     model = "within", effect = "twoways", cluster = "muncode_num")
summary(closei4)




#-------------------------------------
# substantive effect of Model 1

newdat <- closei4$model

coef(closei4)
# marginvs in #1, marginvs:rvs_single_LDPw in #15

min(newdat$mun_population_density)*1000
max(newdat$mun_population_density)*1000

# Effect of a 1-unit increase in Margin in municipalities that are least dense
ldensecoef <- closei4$coefficients[1] + closei4$coefficients[15]*min(newdat$mun_population_density)
ldensecoef

# Effect of a 1-unit increase in Margin in municipalities that are most dense:
mdensecoef <- closei4$coefficients[1] + closei4$coefficients[15]*max(newdat$mun_population_density)
mdensecoef

# To translate these into amounts, multiply these coefficients by 1 sd of margin
# and multiply by 100 to get percent change

(ldensecoef*sd(newdat$marginvs))*100

(mdensecoef*sd(newdat$marginvs))*100






# -----------------------------------------
# Model 2 (their interaction, plus ours)

closei5 <- stata_plm(paste("mun_turnout ~ marginvs*mun_population_density + marginvs*rvs_single_LDPw + Lmun_turnout + ",
                           controlsd), pre.red.pdf,
                     model = "within", effect = "twoways", cluster = "muncode_num")
summary(closei5)






#-------------------------------------
# substantive effect of Model 2

newdat <- closei5$model

# mean of pop density
mean(newdat$mun_population_density)*1000
# 578 (book says 559)

coef(closei5)
# marginvs in #1
# marginvs:mun_population_density # 16
# marginvs:rvs_single_LDPw in #17


# Effect of a 1-unit increase in margin on municipalities whose rank is 1 
# and are at mean pop density:
rank1coef <- closei5$coefficients[1] + 
  closei5$coefficients[16]*mean(newdat$mun_population_density) + 
  closei5$coefficients[17]*1

# in percent:
(rank1coef*sd(newdat$marginvs))*100

# Effect of a 1-unit increase in margin on municipalities whose rank is 0 
# and are at mean pop density:
rank0coef <- closei5$coefficients[1] + closei5$coefficients[16]*mean(newdat$mun_population_density) + closei5$coefficients[17]*0

(rank0coef*sd(newdat$marginvs))*100


# --------------------------------
# print table

stargazer(closei4, closei5,
          no.space = T,
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))










# ------------------------------------------------
# TABLE 8.3 \ref{turnoutpreFD}

pre.red <- subset(elec.dat, elec.dat$year <= 1993 & is.na(elec.dat$muncode_num) == F & elec.dat$tmt_possible==1)

pre.red <- pre.red %>% mutate(cat_year = as.numeric(as.factor(year)))

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "cat_year"))

# Get the variables with which we are going to create first differenced vars:

pre.red.pdf2 <- pre.red.pdf[,c("mun_turnout", 
                               "marginvs", 
                               "vs_single_LDPw", "rvs_single_LDPw",
                               "vs_single_c", "vs_all_LDPc", "vs_all_LDPw",
                               "mun_ceif", "needy_pc", "primary_pc", "lnpop", "logincome_pc","mun_population_density", 
                               "DISceif", "DISneedy", "DISprimary", "logDISpop", "logDISincomepc", "DISdensity", 
                               "HI", "logNum", "DISmal", "ldpsenior", "numLDPcand", 
                               "totseat_in_electoral_district", "ncands_electoral_district",
                               "muncode_num","cat_year", "district_year")]

# Take first differences of first columns:
diff_pdat <- pre.red.pdf2
for (c in 1:26) {
  diff_pdat[,c] <- diff(diff_pdat[,c])
}

# Add lagged versions of variables (attach the prefix lag.)  These variables are from the 
# previous *election", not the previous "year".
# Add values of variables at time t (attach the prefix curr.)
# Can add them directly from pre.red.pdf as observations are in the same order.

diff_pdat <- diff_pdat %>% mutate(
  
  # lagged versions:
  
  lag.year = NA,
  lag.mun_turnout = NA,
  lag.vs_single_LDPw = NA,
  lag.rvs_single_LDPw = NA,
  
  lag.vs_single_c = NA,
  lag.vs_all_LDPc = NA,
  lag.vs_all_LDPw = NA,
  
  lag.district_year = NA,
  lag.marginvs = NA,
  
  # current versions of variables:
  
  curr.year = pre.red.pdf$year,
  curr.mun_turnout = pre.red.pdf$mun_turnout,
  
  curr.vs_single_LDPw = pre.red.pdf$vs_single_LDPw,
  curr.rvs_single_LDPw = pre.red.pdf$rvs_single_LDPw,
  
  curr.vs_single_c = pre.red.pdf$vs_single_c,
  curr.vs_all_LDPc = pre.red.pdf$vs_all_LDPc,
  curr.vs_all_LDPw = pre.red.pdf$vs_all_LDPw,
  
  curr.mun_ceif = pre.red.pdf$mun_ceif,
  curr.needy_pc = pre.red.pdf$needy_pc,
  curr.primary_pc = pre.red.pdf$primary_pc,
  curr.lnpop = pre.red.pdf$lnpop,
  curr.logincome_pc = pre.red.pdf$logincome_pc,
  curr.mun_population_density = pre.red.pdf$mun_population_density,
  
  curr.DISceif =  pre.red.pdf$DISceif,
  curr.DISneedy = pre.red.pdf$DISneedy,
  curr.DISprimary = pre.red.pdf$DISprimary,
  curr.logDISpop = pre.red.pdf$logDISpop,
  curr.logDISincomepc = pre.red.pdf$logDISincomepc,
  curr.DISdensity = pre.red.pdf$DISdensity,
  
  curr.logNum = pre.red.pdf$logNum,
  curr.DISmal = pre.red.pdf$DISmal,
  curr.ldpsenior = pre.red.pdf$ldpsenior,
  curr.numLDPcand = pre.red.pdf$numLDPcand,
  curr.totseat_in_electoral_district = pre.red.pdf$totseat_in_electoral_district,
  curr.ncands_electoral_district = pre.red.pdf$ncands_electoral_district
)

for (var in c("lag.year", "lag.mun_turnout", 
              "lag.vs_single_LDPw", "lag.rvs_single_LDPw",
              "lag.vs_single_c", "lag.vs_all_LDPc", "lag.vs_all_LDPw", 
              "lag.district_year", "lag.marginvs")) {
  orig_var <- sub("lag.", "", var)
  diff_pdat[,var] <- lag(pre.red.pdf[,orig_var])
}

# ----------------------------------------------
# Let us try to reassign the index to include district_year, and then we can run the regression
# and get correct R squared:

diff_pdat2 <- as.data.frame(diff_pdat)
diff_pdat3 <- pdata.frame(diff_pdat2, index = c("muncode_num", "district_year"))

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

# ------------------------------------------------------

diff1 <- stata_plm(paste("mun_turnout ~ vs_single_LDPw + ",
                         controls), data = diff_pdat3,
                   model = "within", effect = "twoways", cluster = "muncode_num")
summary(diff1)

diff2 <- stata_plm(paste("mun_turnout ~ rvs_single_LDPw + ",
                         controls), data = diff_pdat3,
                   model = "within", effect = "twoways", cluster = "muncode_num")
summary(diff2)

# table \ref{turnoutpreFE}

stargazer(diff1, diff2, 
          no.space = T,
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))



